Absolute Value
total_NGRT_school_data %>%
filter(is.na(Student_Name) == F) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(value = n()) %>%
ungroup() %>%
mutate(Origin_School = ifelse(value<2, "Small Schools (< 2 Students)",Origin_School)) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(total_school_amount = sum(value)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = total_school_amount, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", color = "black")+
facet_grid(Year.x~., scales = "free")+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1), legend.position = "none")+
scale_y_continuous(name="Frequency", limits=c(0, 60), breaks = seq(0,60,10))+
labs(x = "Primary School",
title = "Distribution of Students Below a 11 Year Old Reading Age")

Percent
total_NGRT_school_data %>%
filter(is.na(Student_Name) == F) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(value = n()) %>%
ungroup() %>%
mutate(Origin_School = ifelse(value<2, "Small Schools (< 2 Students)",Origin_School)) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(total_school_amount = sum(value)) %>%
ungroup() %>%
group_by(Origin_School, Year.x) %>%
mutate(percent = round(total_school_amount/sum(total_school_amount)*100,2))%>%
ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = percent, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", color = "black")+
facet_grid(Year.x~., scales = "free")+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1), legend.position = "none")+
scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
labs(x = "Primary School",
title = "Proportion of Students Below 11 Year Old Reading Age",
subtitle = "Last 4 Years")

Inspecting Overall Reading Age
The NGRT dataset provided to the school using a 95% confidence interval. This means that if the test was repeated 100 times, student reading agaes would flucuate between the upper and lower CI interval 95 times (95% accuracy rating). For this reason, I only used the Overall Reading Age was used to inspect performance of the students over the last 4 years. However, the dataset that was provided by the school was not in an ideal formate and utilised a 12 point scale between each year from 10:00 to 10:11.
To inspect distribution of the reading age by a variety of factors, I needed to transform this categorical scale to a numeric scale. To achieve this, I separated each reading age into years and months using the “:”, added 1 to each of the months to get rid of the zero values, remove the plus signs (17:00+) from high reading ages, divded months by 13 to get a decimal value and added that decimal value to the years column. The reason, I did not divide by 13 was becuase when I added 1 to to a value of 11 and then divided by 12 it returned a value of 1 thus, resulting in an incorrect reading age (increasing reading age by 1 and not 0.92). Also all student above 17 years old were grouped together because the upper limiter was not specified.
all_metrics <- rbind(metrics_2018, metrics_2019, metrics_2020, metrics_2021)
rownames(all_metrics) <- c("2018","2019","2020","2021")
all_metrics$Max <- "17:00+"
all_metrics<- as.data.frame(t(all_metrics))
knitr::kable(all_metrics, align = "lccrr", digits = 2)
| Max |
17:00+ |
17:00+ |
17:00+ |
17:00+ |
| Q3 |
14.308 |
14.077 |
14.308 |
13.692 |
| Median |
12.154 |
12.923 |
12.154 |
11.615 |
| Q1 |
10.30800 |
10.92300 |
9.25025 |
9.42350 |
| Min |
5.308 |
5.923 |
5.154 |
5.077 |
total_NGRT_school_data %>%
ggplot(aes(x = Year.x, y = Years_as_numeric, fill = Year.x))+
geom_boxplot()+
scale_y_continuous(name = "Reading Age", limits = c(5.0,17.5), breaks = seq(5.0,17.5,0.5))+
theme_bw()

Distribution By Year
2018
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2018")], xaxt = "n", main = "Distribution of Reading Age for 2018", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

2019
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2019")], xaxt = "n", main = "Distribution of Reading Age for 2019", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

2020
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2020")], xaxt = "n", main = "Distribution of Reading Age for 2020", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

2021
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2021")], xaxt = "n", main = "Distribution of Reading Age for 2021", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

Past 4 Years Combined
hist(total_NGRT_school_data$Years_as_numeric, xaxt = "n", main = "Distribution of Reading Age", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

All
library(knitr)
all_data <- total_NGRT_school_data %>%
select(Year.x,Years_as_numeric) %>%
mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
group_by(Year.x) %>%
summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 9 and >= 7" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 11 and >= 9" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 13 and >= 11" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 15 and >= 13" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)
show_all_data <- all_data
colnames(show_all_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")
kable(show_all_data, align = "c", digits = 2)
| 2018 |
2.01 |
12.08 |
19.46 |
28.19 |
20.13 |
18.12 |
| 2019 |
1.96 |
9.80 |
15.69 |
23.53 |
33.33 |
15.69 |
| 2020 |
6.25 |
15.97 |
15.97 |
22.92 |
18.06 |
20.83 |
| 2021 |
3.75 |
17.50 |
19.38 |
28.12 |
16.88 |
14.37 |
library(reshape2)
melt(all_data, id = "Year.x") %>%
ggplot(aes(x = variable, y = value))+
geom_bar(stat = "identity", color = "black", fill = "steelblue")+
facet_grid(~Year.x)+
scale_y_continuous(name="Percentage (%)", limits=c(0, 38), breaks = seq(0,35,5))+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1))

Male
Male_data <- total_NGRT_school_data %>%
select(Gender,Year.x,Years_as_numeric) %>%
filter(Gender == "Male") %>%
mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
group_by(Year.x) %>%
summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 9 and >= 7" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 11 and >= 9" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 13 and >= 11" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 15 and >= 13" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)
show_male_data <- Male_data
colnames(show_male_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")
kable(show_male_data, align = "c", digits = 2)
| 2018 |
1.32 |
15.79 |
17.11 |
30.26 |
19.74 |
15.79 |
| 2019 |
3.77 |
11.32 |
15.09 |
22.64 |
35.85 |
11.32 |
| 2020 |
7.58 |
19.70 |
15.15 |
21.21 |
18.18 |
18.18 |
| 2021 |
7.50 |
21.25 |
22.50 |
20.00 |
13.75 |
15.00 |
melt(Male_data, id = "Year.x") %>%
ggplot(aes(x = variable, y = value))+
geom_bar(stat = "identity", color = "black", fill = "steelblue")+
facet_grid(~Year.x)+
scale_y_continuous(name="Percentage (%)", limits=c(0, 38), breaks = seq(0,35,5))+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1))

Female
female_data <- total_NGRT_school_data %>%
select(Gender,Year.x,Years_as_numeric) %>%
filter(Gender == "Female") %>%
mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
group_by(Year.x) %>%
summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 9 and >= 7" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 11 and >= 9" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 13 and >= 11" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"< 15 and >= 13" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)
show_female_data <- female_data
colnames(show_female_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")
kable(show_female_data, align = "c", digits = 2)
| 2018 |
2.74 |
8.22 |
21.92 |
26.03 |
20.55 |
20.55 |
| 2019 |
0.00 |
8.16 |
16.33 |
24.49 |
30.61 |
20.41 |
| 2020 |
5.13 |
12.82 |
16.67 |
24.36 |
17.95 |
23.08 |
| 2021 |
0.00 |
13.75 |
16.25 |
36.25 |
20.00 |
13.75 |
melt(female_data, id = "Year.x") %>%
ggplot(aes(x = variable, y = value))+
geom_bar(stat = "identity", color = "black", fill = "pink")+
scale_y_continuous(name="Percentage (%)", limits=c(0, 38), breaks = seq(0,35,5))+
facet_grid(~Year.x)+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1))

Indigenous Status
indigenous_data <- total_NGRT_school_data %>%
select(Indigenous,Year.x,Years_as_numeric) %>%
filter(Indigenous == "Y") %>%
mutate(Over_15 = ifelse(Years_as_numeric>15,1,0),
btw_15_and_13 = ifelse(Years_as_numeric<=15 & Years_as_numeric>13,1,0),
btw_13_and_11 = ifelse(Years_as_numeric<=13 & Years_as_numeric>11,1,0),
btw_11_and_9 = ifelse(Years_as_numeric<=11 & Years_as_numeric>9,1,0),
btw_9_and_7 = ifelse(Years_as_numeric<=9 & Years_as_numeric>7,1,0),
Under_7 = ifelse(Years_as_numeric<=7,1,0)) %>%
group_by(Year.x) %>%
summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 9 and > 7" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 11 and > 9" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 13 and > 11" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 15 and > 13" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"> 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)
show_indig_data <- indigenous_data
colnames(show_indig_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")
kable(show_indig_data, align = "c", digits = 2)
| 2018 |
6.67 |
16.67 |
40.00 |
16.67 |
16.67 |
3.33 |
| 2019 |
5.00 |
20.00 |
20.00 |
20.00 |
30.00 |
5.00 |
| 2020 |
10.34 |
41.38 |
13.79 |
20.69 |
6.90 |
6.90 |
| 2021 |
3.03 |
33.33 |
33.33 |
24.24 |
6.06 |
0.00 |
melt(indigenous_data, id = "Year.x") %>%
ggplot(aes(x = variable, y = value))+
geom_bar(stat = "identity", color = "black", fill = "darkgreen")+
facet_grid(~Year.x)+
scale_y_continuous(name="Percentage (%)", limits=c(0, 42), breaks = seq(0,40,5))+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1))

Non-Indigenous Status
nonindigenous_data <- total_NGRT_school_data %>%
select(Indigenous,Year.x,Years_as_numeric) %>%
filter(Indigenous == "N") %>%
mutate(Over_15 = ifelse(Years_as_numeric>15,1,0),
btw_15_and_13 = ifelse(Years_as_numeric<=15 & Years_as_numeric>13,1,0),
btw_13_and_11 = ifelse(Years_as_numeric<=13 & Years_as_numeric>11,1,0),
btw_11_and_9 = ifelse(Years_as_numeric<=11 & Years_as_numeric>9,1,0),
btw_9_and_7 = ifelse(Years_as_numeric<=9 & Years_as_numeric>7,1,0),
Under_7 = ifelse(Years_as_numeric<=7,1,0)) %>%
group_by(Year.x) %>%
summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 9 and > 7" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 11 and > 9" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 13 and > 11" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"<= 15 and > 13" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
"> 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)
show_nonindig_data <- nonindigenous_data
colnames(show_nonindig_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")
kable(show_nonindig_data, align = "c", digits = 2)
| 2018 |
0.86 |
10.34 |
14.66 |
31.03 |
21.55 |
21.55 |
| 2019 |
1.25 |
7.50 |
15.00 |
22.50 |
35.00 |
18.75 |
| 2020 |
5.22 |
9.57 |
16.52 |
23.48 |
20.87 |
24.35 |
| 2021 |
3.94 |
13.39 |
15.75 |
29.13 |
19.69 |
18.11 |
melt(nonindigenous_data, id = "Year.x") %>%
ggplot(aes(x = variable, y = value))+
geom_bar(stat = "identity", color = "black", fill = "orange")+
facet_grid(~Year.x)+
scale_y_continuous(name="Percentage (%)", limits=c(0, 42), breaks = seq(0,40,5))+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1))
